from IPython.display import IFrame, HTML
IFrame("GWDA_assignment-2.pdf", width="100%", height=400)
import plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
import pandas
import pylab
from pycbc.filter import highpass, resample_to_delta_t, matched_filter, sigma, match, sigmasq
from pycbc.catalog import Merger
from pycbc.frame import read_frame
import pycbc.psd
from pycbc.psd import interpolate, inverse_spectrum_truncation, aLIGOZeroDetHighPower, welch
from pycbc.waveform import get_td_waveform, get_fd_waveform
from pycbc.conversions import mass1_from_mchirp_q
import pycbc.types
Here I tried to use both the local data from the download, and the data accessible from ther Merger function. The time series look very different, which is surprising considerig they should pretty much be the same, but they do result in a very similar looking PSD and mass values. I decided to go with the grid data as it was more practical and some of the code in the hints showed a slight use of the Merger data, and so I thought that I would just fully use the timeseries given by it.
The next two cells simply import the data filters it to remove low frequency noises that are not useful for this analysis with a highpass filter and plot it using Plotly for the interactive plots. All of the plots are available as exterior HTML code that can be embeded into any webpage and be interactive.
merger = Merger("GW170817")
strain, stilde = {}, {}
for detector in ['H1', 'L1']:
# This comment is for the use of the local data. Kept in in case I want to switch back
# ts = read_frame(f"{detector[0]}-{detector}_LOSC_CLN_4_V1-1187007040-2048.gwf",
# f"{detector}:LOSC-STRAIN",
# start_time=merger.time-224,
# end_time=merger.time + 32,
# check_integrity=False)
strain[detector] = resample_to_delta_t(highpass(merger.strain(detector), 20.0), 1.0/2048)
strain[detector] = strain[detector].crop(2,2)
stilde[detector] = strain[detector].to_frequencyseries()
fig1 = go.Figure()
fig1.add_trace(go.Scattergl(x=strain['H1'].sample_times,
y=strain['H1'],
mode='lines',
name="LIGO_Hanford"))
fig1.add_trace(go.Scattergl(x=strain['L1'].sample_times,
y=strain['L1'],
mode='lines',
name="LIGO_Livingston"))
fig1.update_layout(xaxis_title='Time (s)')
fig1.write_html("Q1_1_LIGO_Hanford_raw.html")
IFrame(src='./Q1_1_LIGO_Hanford_raw.html', width="100%", height=500)
In this part we simply calculate the power desity spectrum of the Livingston and Hanford observatories from our data. In this case we use 2 second data samples for the Welch method, which will create our psd.
The last method, inverse_spectrum_truncation, just creates the psd that doesn't allow for the psd to include frequencied below 20Hz, as this is the value of the highpass filter that we created in the last section, over the data.
We then simply plot the output psds together.
psds = {}
for detctor in ['L1', 'H1']:
psds[detctor] = interpolate(strain[detctor].psd(2), strain[detctor].delta_f)
psds[detctor] = inverse_spectrum_truncation(psds[detctor],
int(2 *strain[detctor].sample_rate),
low_frequency_cutoff=20.0,
trunc_method='hann')
fig2 = go.Figure()
fig2.add_trace(go.Scattergl(x=psds["H1"].sample_frequencies,
y=psds["H1"],
mode='lines',
name="LIGO_Hanford"))
fig2.add_trace(go.Scattergl(x=psds["L1"].sample_frequencies,
y=psds["L1"],
mode='lines',
name="LIGO_Livingston"))
fig2.update_xaxes(type="log", range=[1.3,3])
fig2.update_yaxes(type="log", range=[-47,-42])
fig2.update_layout(xaxis_title='Frequency [Hz]',
yaxis_title='Strain^2/Hz',
title="GW170817 Power Spectral Density")
fig2.write_html("Q1_2_Power_Spectral_Density.html")
IFrame(src='./Q1_2_Power_Spectral_Density.html', width="100%", height=500)
The next step consisted in calculating the estimated mass of the objects in the binary system (we assume both have the same mass). With help, we know the mass is in the range of 1.3 to 1.5. We create a loop that goes through every value in this range, with a specific step value and see which mass has the highest snr, which would indicate that this mass is the correct value. In this case we focused on the H1 detector as it's the one we unsed the the next steps.
#this is to choose which detector we look at when measuring the mass. We can do best SNR of both (detector = ["H1","L1"])
# or in this case, we choose the detector that we will be using below, meaning H1:
detector = ["H1"]
masses = np.arange(1.3, 1.5, .01)
hmax, smax, tmax, mmax, nsnr = None, {}, {}, 0, 0
snrs = []
for m in masses:
max_snr, max_time = {}, {}
for ifo in detector:
hp, hc = get_fd_waveform(approximant="TaylorF2",
mass1=m, mass2=m,
f_lower=20, delta_f=stilde[ifo].delta_f)
hp.resize(len(stilde[ifo]))
snr = matched_filter(hp, stilde[ifo],
psd=psds[ifo],
low_frequency_cutoff=20.0)
snr = snr.time_slice(merger.time - 1, merger.time + 1)
_, idx = snr.abs_max_loc()
max_snr[ifo] = snr[idx]
max_time[ifo] = float(idx) / snr.sample_rate + snr.start_time
network_snr = (abs(np.array(list(max_snr.values()))) ** 2.0).sum()** 0.5
snrs.append(max_snr)
if network_snr > nsnr:
tmax, hmax, mmax, smax = max_time, hp, m, max_snr
nsnr = network_snr
print("We found the best Mass1=Mass2 was %2.2f solar masses (detector frame)" % mmax)
We found the best Mass1=Mass2 was 1.38 solar masses (detector frame)
nsnr
9.558125973927565
We found that the mass is 1.38 solar masses and that the snr for that is at 9.56. If we do the same for the L1 detector, we find a value of 1.36 solar masses and snr around 34.
The next steps are a little more complicated in my understanding, as the code above uses a waveform defined through it's frequency domain, but all of the examples that I could find on PyCBC used time series to create the wave form (get_td_waveform instead of get_fd_waveform). As such I tried to convert the code I found into frequency domain, but this creates complex values and messes around with the time position of the merger. It was also sometimes a little tricky for me to understand if I should use strain or stilde (which are the time and frequency series respectively) in certain contexts or if cyclic_time_shift still applied and so some weird behaviour probably spawns from that.
It's a shame that this merger and the one given as example are so different, as it was difficult for someone not initiated to understand what variables should change or not (like filter lengths and values, delta_f, what differences there are with approximants, because those are complicated, etc...). Giving a merger that has more similarities would probably have made the exercice less dense, as it is so much theory and library info to learn in theoretically one week.
The code below creates a frequency domain waveform with the mass found above and then shifts the data so that the merger can be found at the beggining of the array. This is the purpose of the cyclic_time_shift, which treats the data as a circle and can shift the start around. "get_td_waveform" creates the series so that the merger is at zero, so that is what we have to give the function. This is however where I'm not sure, because I use this on "get_fd_waveform" instead and I don't know if that is allowed. There was no errors so I left it like this, but the template is now a complex number series instead of real numbers...
The code then goes on and makes the matched filter, once again using the template we created and the frequency series of our data. The filtering process can corrupt the beggining and end of the data, so we just chop away 8 seconds at the beggining and 4 at the end to remove these undesired structures. We then just plot the SNR, taking into account that it's a complexe set and find the maximum value.
m = 1.38
#testing around with the frequency domain function
hp, hc = get_fd_waveform(approximant="TaylorF2",
mass1=m, mass2=m,
f_lower=20, delta_f=stilde[ifo].delta_f)
hp.resize(len(stilde["H1"]))
# hp, hc = get_td_waveform(approximant="TaylorT2",
# mass1=m,
# mass2=m,
# delta_t=strain["H1"].delta_t,
# f_lower=20)
# hp.resize(len(strain["H1"]))
template = hp.cyclic_time_shift(hp.start_time)
snr = matched_filter(template, stilde["H1"],
psd=psds["H1"], low_frequency_cutoff=20)
# time domain version of the code
# snr = matched_filter(hp, stilde[ifo],
# psd=psds["H1"],
# low_frequency_cutoff=20.0)
snr = snr.crop(4 + 4, 4)
fig2 = go.Figure()
fig2.add_trace(go.Scattergl(x=snr.sample_times,
y=abs(snr),
mode='lines',
name="LIGO_Hanford"))
fig2.update_layout(xaxis_title='Time (s)',
yaxis_title='Signal-to-noise',
title="GW170817 - H1 - SNR Plot")
fig2.write_html("Q1_3_SNR_Plot.html")
peak = abs(snr).numpy().argmax()
snrp = snr[peak]
time = snr.sample_times[peak]
print("We found a signal at {}s with SNR {}".format(time, abs(snrp)))
We found a signal at 1187008882.4433594s with SNR 9.558126354250904
IFrame(src='./Q1_3_SNR_Plot.html', width="100%", height=500)
We now will try, thanks to the SNR value and all the above steps, to fit the template onto the noisy data, as the time, amplitude, and phase of the SNR peak tell us how to align them together.
The first step is to shift the template to the right spot and then scale it so that it would have an SNR of 1 if it was in data. This is done by using the sigma function. The next step is to scale the template amplitude and phase to the highest value.
The next steps in the process is to whiten the template and the data and then bandpass them between 30-300 Hz. In this way, any signal that is in the data is transformed in the same way that the template is and will match. The next step is simply to select the merge area data from both the template and the data and superpose them in a plot to see the match.
I am pretty sure this is not what the plot should be like, but I never managed to get better results except by choosing a mass much higher than what we found (36 solar masses). I however looked at the merger and it is an even of two neutron starts, with the mass approximately of what we found, so I don't know where it went wrong.
########## PREPING THE TEMPLATE ###########
dt = time - stilde["H1"].start_time
aligned = template.cyclic_time_shift(dt)
aligned /= sigma(aligned, psd=psds["H1"], low_frequency_cutoff=20.0)
aligned = (aligned.to_frequencyseries() * snrp).to_timeseries()
aligned.start_time = strain["H1"].start_time
########### WHITENING ###############
white_data = (strain["H1"].to_frequencyseries() / psds["H1"]**0.5).to_timeseries()
# apply a smoothing of the turnon of the template to avoid a transient
# from the sharp turn on in the waveform.
tapered = aligned.highpass_fir(30, 512, remove_corrupted=False)
white_template = (tapered.to_frequencyseries() / psds["H1"]**0.5).to_timeseries()
########### BANDPASS FILTERING ################
white_data = white_data.highpass_fir(30., 512).lowpass_fir(200, 512)
white_template = white_template.highpass_fir(30, 512).lowpass_fir(200, 512)
########### CHOOSING THE MERGER AREA ##################
white_data = white_data.time_slice(merger.time - 10, merger.time + 2)
white_template = white_template.time_slice(merger.time - 10, merger.time + 2)
fig2 = go.Figure()
fig2.add_trace(go.Scattergl(x=white_data.sample_times,
y= white_data,
mode='lines',
name="Data"))
fig2.add_trace(go.Scattergl(x=white_template.sample_times,
y= white_template,
mode='lines',
name="Template"))
fig2.update_layout(xaxis_title='Time',
yaxis_title='Amplitude',
title="Merger Event")
The last step of this part is to look at the QTransforms of the whitened data. These are very simple to implement with PyCBC and can be seen below
subtracted = strain["H1"] - aligned
# Plot the original data and the subtracted signal data
for data, title, filename in [(strain["H1"], 'Original H1 Data', 'Original_H1_Data'),
(subtracted, 'Signal Subtracted from H1 Data', 'Signal_Subtracted_from_H1_Data')]:
t, f, p = data.whiten(4, 4).qtransform(.001,
logfsteps=100,
qrange=(8, 8),
frange=(20, 512))
fig = go.Figure(data=go.Heatmap(
z=p**0.5,
x=t,
y=f,
colorscale='Viridis'))
fig.update_yaxes(type="log")
fig.update_xaxes(range=[merger.time - 0, merger.time + 2])
fig.update_layout(title="QTransform - " + title,
xaxis_title='Time (s)',
yaxis_title='Frequency (Hz)')
fig.write_html(f"Q1_7_{filename}.html")
del fig
IFrame(src='./Q1_7_Original_H1_Data.html', width="100%", height=500)
IFrame(src='./Q1_7_Signal_Subtracted_from_H1_Data.html', width="100%", height=500)
In this part of the question we investigate the correlation between close waveforms, or more specifically, how well they match.
The first step is to create the base waveform of known mass. This is the wone that will be used as a base. The code then goes into a loop for a certain range of masses, and at each iteration creates a new waveform with that new mass. The two waveforms are then both resized to the same length. The aLIGO psd is then generated and used to create the match between the two waveforms. These matches are then stored in an array and plotted, showing the highest similarity between waveforms of same mass, as expected.
f_low = 30
sample_rate = 4096
# Base waveform
hp, hc = get_td_waveform(approximant = "EOBNRv2",
mass1 = 10,
mass2 = 10,
f_lower = f_low,
delta_t = 1.0/sample_rate)
matches = []
masses = np.arange(5, 15, 0.2)
for mass in masses:
sp, sc = get_td_waveform(approximant = "TaylorT4",
mass1 = mass,
mass2 = mass,
f_lower = f_low,
delta_t = 1.0/sample_rate)
# Resize the waveforms to the same length
tlen = max(len(sp),len(hp))
sp.resize(tlen)
hp.resize(tlen)
delta_f = 1.0/sp.duration
flen = tlen // 2 + 1
psd = aLIGOZeroDetHighPower(flen, delta_f, f_low)
m,i = match(hp, sp, psd=psd, low_frequency_cutoff = f_low)
matches.append(m)
fig1 = go.Figure()
fig1.add_trace(go.Scatter(x=masses,
y=matches,
mode='markers',
name="LIGO_Hanford"))
fig1.update_layout(xaxis_title='Mass',
yaxis_title='Match',
title="Correlation Between GW Waveform and Nearby Template")
fig1.write_html("Q1_8_waveform_correlation.html")
IFrame(src='./Q1_8_waveform_correlation.html', width="100%", height=500)
from IPython.display import IFrame, HTML
IFrame("GWDA_assignment-2.pdf", width="100%", height=400)
Same steps as above to create the PSD: this cell loads and prepares the data into time and frequency domains.
d = np.load("noise_ts_4096Hz.npy")
time = d[:,0]
strain = d[:,1]
dt = time[1] - time[0]
data = pycbc.types.TimeSeries(d[:, 1], delta_t = dt)
strain = resample_to_delta_t(highpass(data, 20.0), 1.0/2048)
stilde = strain.to_frequencyseries()
psds = interpolate(strain.psd(2), stilde.delta_f)
psds = inverse_spectrum_truncation(psds,
int(2 *strain.sample_rate),
low_frequency_cutoff=15.0,
trunc_method='hann')
fig2 = go.Figure()
fig2.add_trace(go.Scattergl(x=psds.sample_frequencies,
y=psds,
mode='lines',
name="LIGO_Hanford"))
fig2.update_xaxes(type="log", range=[1.3,3])
fig2.update_yaxes(type="log", range=[-47,-42])
fig2.update_layout(xaxis_title='Frequency [Hz]',
yaxis_title='Strain^2/Hz',
title="Power Spectral Density")
fig2.write_html("Q2_1_Power_Spectral_Density.html")
IFrame(src='./Q2_1_Power_Spectral_Density.html', width="100%", height=500)
Also as in the earlier question, we create the waveform and match filter it to get the SNR. In this case we get an SNR of 5.18, meaning that we don't have a signal in our data set, only noise, which allows us to verify that the noise is white, meaning that it should distribute as a gaussian with zero mean. We also plot the SNR.
flow = 30 #Hz
hp, hc = pycbc.waveform.get_fd_waveform(approximant="TaylorF2",
mass1=10,
mass2=10,
f_lower=flow,
delta_f=stilde.delta_f)
hp.resize(len(stilde))
snr = pycbc.filter.matched_filter(hp, stilde, psd=psds,
low_frequency_cutoff=flow)
# Remove regions corrupted by filter wraparound
snr = snr[len(snr) // 4: len(snr) * 3 // 4]
fig2 = go.Figure()
fig2.add_trace(go.Scattergl(x=snr.sample_times,
y=abs(snr),
mode='lines',
name="LIGO_Hanford"))
fig2.update_layout(xaxis_title='time (s)',
yaxis_title='signal-to-noise ratio',
title="SNR Plot")
fig2.write_html("Q2_2_SNR_Plot.html")
print ( 'Maximum SNR', max(abs(snr)) )
Maximum SNR 5.182224578145249
IFrame(src='./Q2_2_SNR_Plot.html', width="100%", height=500)
This part will investigate the distribution of the noise in our data. First we whiten the data we have and interpolate it and applying the Welch method.
psds = interpolate(welch(data), 1.0 / data.duration)
white_data = (data.to_frequencyseries() / psds**0.5).to_timeseries()
Now we want to represent to represent this whitened data as a dirstibution, and as such we can also calculate the standard deviation $\sigma$ and the mean $\mu$ as well as plot the distribution. As we can see from this, the mean is very close to zero and the distribution looks exactly like a gaussian. The distribution is also normalized before plotting.
sigma = np.std(white_data)
mu = np.mean(white_data)
print("sigma = " + str(sigma),"; mu = " + str(mu))
sigma = 60.47948790014771 ; mu = 0.052291781117840296
fig = go.Figure(data=[go.Histogram(x=white_data, histnorm='probability')])
fig.update_xaxes(range=[-200,200])
fig.update_layout(xaxis_title='noise',
yaxis_title='probability',
title="Whitened data histogram: σ = 60.4794879 and μ = 0.05229178")
fig.write_html("Q2_3_whiteNoise_Hist.html")
IFrame(src='./Q2_3_whiteNoise_Hist.html', width="100%", height=500)
We now are going to demonstrate that the noise is not distributed as stationnary gaussian noise. To do this instead of using the whole data to look at the snr, we will do the same steps as above but for smaller data chuncks and then look at that distribution. In this cell we cut the data into 1000 slices that we analyze.
As we can see from the distribution, when applied to smaller chuncks the snrs drawn from data are not similar at all and as such are not stationnary but vary throughout the dataset.
hp, hc = get_fd_waveform(approximant="TaylorF2",
mass1=3,
mass2=3,
delta_f=0.001,
distance = 500,
f_lower=20.0,
f_final = 2048.0)
nb_slice = 1000
slice_size = int(len(data)/nb_slice)
SNRs =[]
for i in range(nb_slice):
data_slice = data[slice_size*i:slice_size*(i+1)]
psds = interpolate(welch(data_slice), hp.delta_f)
SNR = (pycbc.filter.sigmasq(hp, psds))**0.5
SNRs.append(SNR)
sigma = np.std(SNRs)
mu = np.mean(SNRs)
print("sigma = " + str(sigma),"; mu = " + str(mu))
sigma = 1.6560736659056083 ; mu = 15.65365114514173
fig = go.Figure(data=[go.Histogram(x=SNRs, histnorm='probability')])
fig.update_layout(xaxis_title='SNR',
yaxis_title='probability',
title="Estimated SNR histogram: σ = 1.6560736 and μ = 15.6536511")
fig.write_html("Q2_4_estSNR_Hist.html")
IFrame(src='./Q2_4_estSNR_Hist.html', width="100%", height=500)
from IPython.display import IFrame, HTML
IFrame("GWDA_assignment-2.pdf", width="100%", height=400)
In this section we investigate the horizon distance of different gravitational wave detectors.
in this cell we import the prebuilt PSD data from the PyCBC library. Here we import data from aLIGO and filter it's frequency range by changing transforming to infinity all the frequencies that are not in the wanted range. We then plot the psd of the observatory.
import pycbc.psd
flow = 4.0
delta_f = 1.0 / 16
flen = int(2048.0/ (delta_f)) + 1
psd_ligo = pycbc.psd.aLIGOZeroDetHighPower(flen, delta_f, flow)
psd_ligo.data[:int(flow/delta_f)] = np.inf #set the value outside the frequency range to infinity
psd_ligo.data[-1] = np.inf
fig2 = go.Figure()
fig2.add_trace(go.Scattergl(x=psd_ligo.sample_frequencies,
y=psd_ligo,
mode='lines',
name="aLIGO"))
fig2.update_xaxes(type="log", range=[1,3.306])
fig2.update_yaxes(type="log", range=[-47,-44])
fig2.update_layout(xaxis_title='Frequency [Hz]',
yaxis_title='Amplitude',
title="aLIGO Power Spectral Density")
fig2.write_html("Q3_1_PSD_aLIGO.html")
IFrame(src='./Q3_1_PSD_aLIGO.html', width="100%", height=500)
We now create a waveform that can take multiple mass values to analyze how the SNR detected by the detector changes depending on the mass of the binary system. We only keep the values that are > 8 as they represent an actual event and scale them by dividing by 8. We then plot those values, called the horizon distance.
flow = 4.0
masses = np.arange(5,550,1)
x, y = [], []
for m in masses:
hp, hc = get_fd_waveform(approximant="TaylorF2",
mass1=m,
mass2=m,
f_lower = flow,
delta_f=delta_f,
distance = 1000)
SNR = (pycbc.filter.sigmasq(hp, psd_ligo))**0.5
if SNR > 8:
y.append(SNR/8)
x.append(m*2)
fig2 = go.Figure()
fig2.add_trace(go.Scattergl(x=x,
y=np.array(y),
mode='lines',
name="aLIGO"))
fig2.update_layout(xaxis_title='Total Mass (m1+m2)',
yaxis_title='Horizon Distance (Gpc)',
title="Horizon Distance - aLIGO")
fig2.write_html("Q3_2_HD_aLIGO.html")
IFrame(src='./Q3_2_HD_aLIGO.html', width="100%", height=500)
The horizon distance is a good tool to analyze how efficiently an GW observatory can detect signals from different binary sources, with different masses. In our example above we only used waveforms of the same distance, but that can also be varied, with the mass to creat a map of the sensitivity of an observatory. in this case, we can see that the SNR for low masses is quite low, as well as for very massive object. These are the limitations of aLIGO for a distance of 1000Mpc. aLIGO for objects at this distance is most efficient to detect signals emmited by binary binary systems of combined mass ~100 solar masses.
In this part, we just repeat the same as for part 1, but for the Einstein telescope.
flow = 4.0 # set up the lower cut off frequency
delta_f = 1.0 / 16
flen = int(2048.0/ (delta_f)) + 1
psd_Eins = pycbc.psd.EinsteinTelescopeP1600143(flen, delta_f, flow)
psd_Eins.data[:int(flow/delta_f)] = np.inf #set the value outside the frequency range to infinity
psd_Eins.data[-1] = np.inf
fig2 = go.Figure()
fig2.add_trace(go.Scattergl(x=psd_Eins.sample_frequencies,
y=psd_Eins,
mode='lines',
name="LIGO_Hanford"))
fig2.update_xaxes(type="log", range=[0.6,3.306])
fig2.update_yaxes(type="log", range=[-49,-44])
fig2.update_layout(xaxis_title='Frequency [Hz]',
yaxis_title='Amplitude',
title="Power Spectral Density")
fig2.write_html("Q3_3_PSD_Einstein.html")
IFrame(src='./Q3_3_PSD_Einstein.html', width="100%", height=500)
flow = 4.0
masses = np.arange(5,550,1)
x, y = [], []
for m in masses:
hp, hc = get_fd_waveform(approximant="TaylorF2",
mass1=m,
mass2=m,
f_lower = flow,
delta_f=delta_f,
distance = 1000)
SNR = (pycbc.filter.sigmasq(hp, psd_Eins))**0.5
if SNR > 8:
y.append(SNR/8)
x.append(m*2)
fig2 = go.Figure()
fig2.add_trace(go.Scattergl(x=x,
y=y,
mode='lines',
name="LIGO_Hanford"))
fig2.update_layout(xaxis_title='Total Mass (m1+m2)',
yaxis_title='Horizon Distance (Gpc)',
title="Horizon Distance - aLIGO")
fig2.write_html("Q3_4_HD_Einstein.html")
IFrame(src='./Q3_4_HD_Einstein.html', width="100%", height=500)
We can see that compare to aLIGO with same distance, the Einstein telescope has a much wider range of visibility and a maximum for systems with four times the mass of what aLIGO can detect.